# ---------------------------------------------------------
# TUM - Technichal University of Munich
#
# Original Authors: Taylor Jones
#                   Magdalena Altmann
# Further Authors:  Adrian
#                   Friedrich Klappenbach
# Date: 2020-12-06
# Purpose: This code is only to start the inversion in from
#          the campaign  folder with the according 
#          parameters.
# 
# Usage:
#     Campaign Folder that contains:
#       observations sub folder (\obs)
#       inventories sub folder (\inventories)
#       footprints sub folder (\foot)
#       Run_Inversion.R (this File)
#     execute Run_Inversion.R after adjusting parameters accordingly
# ---------------------------------------------------------

rm(list = ls(all.names = TRUE))  # clear R environment (clear workspace)

# this is the path to the inverse modeling engine including \\ at the end. 
path_inverse_modeling_engine <- paste0(dirname(rstudioapi::getSourceEditorContext()$path),"/../inverse_modeling_engine/")
# path_inverse_modeling_engine <- "C:/Path/to/inverse_modeling_engine/"

# shapes of administrative regions:
path_geojson<-"gemeinden_simplify200.geojson"

# Campaign prefix:
prefix <- 'INDY'

# Select dates that should be evaluated: e.g. 20180827 for Aug. 27th in 2018
# multiple dates possible: dates <- c(20180818, 20180820)
dates <- c(20160513,20160515,20160518,20160519,20160522)
#dates <- c(20160513) #,20160515)
# browser() ## a break point inserted here

# Select station names that should be evaluated from folder "obs\XXYYMMDD.csv"
# XX denotes prefix (e.g. MUC for munich)
ems     <- c('ha','hb','kf','pi')

#where the ems were: 
#ToDo: Fetch data automatically
em_lats <- c(39.827, 39.694, 39.862, 39.708,39.788)  # latitudes of em27 locations
em_lons <- c(-86.300,-86.055,-86.004,-86.169,-86.173)  # longitudes of em27 locations


# Select footprint data file: The prefix will be added
# . e.g. "_ERA5_bw8_small"
foot_suffix <- "_NAMS_fp.nc"

############### INVENTORIES ######################
inventory_files  <- c(hestia='indy_methane.nc',
                      roads='indy_methane_roads.nc',
                      population='indy_methane_population.nc')

inventory_files  <- c(hestia='indy_methane.nc')


#Constants and Conversions:
to_ggyr <- 16.04*365.25*60*60*24*1e-9  #umol/m2s -> Gg/yr ...still need to multiply by total domain area in km2!

# using preprocessed data: apply_scaling = FALSE; using observations directly: TRUE
apply_scaling = FALSE  # what does this do??
# solve inversion for all measurement days together: invert_all_days = TRUE
invert_all_days = TRUE # also produces a nicer bar plot

use_transport_error = TRUE
t_error_directory <- "~/Desktop/INDY/inverse-modeling/indy/transport_error/"
t_error_suffix <- "NAMS_transport_error_hourly_25.rds"

error_method <- "posterior" #other option is "bootstrap"

#Parameters for Inversion:  -------------------------------------------------------------------------------------------------------------
sigma_sector_priors <- list("X1"=2,"X2"=1e-2,"X3"=1e-2,"X4"=1e-2)
sigma_obs_prior     <- 2e-3      # ppm  (2 ppb)
sigma_bkgd_prior    <- .01      # ppm  (10 ppb) 1e-2 
t_back              <- 9*60*60   # seconds: The timescale (e-folding rate) of background variability
bkgd_prior          <- 1.84      # ppm: prior background value

#Parameters for Bootstrap: --------------------------------------------------------------------------------------------------------------
sector_to_bootstrap <- "X1"
bs_x_err            <- 2e-4  # ppm:  (0.2 ppb) error with which to draw for bootstrap
bs_y_err            <- 2e-4  # ppm:  (0.2 ppb) error with which to draw for bootstrap
bootstrap_reps      <- 5000  # times to run each of the bootstraps

path_campaign <- dirname(rstudioapi::getSourceEditorContext()$path)

fig_directory <- paste0(path_campaign,"/figs")

if (!dir.exists(fig_directory)){
  dir.create(fig_directory)
  message('fig-folder created.')
}


source(paste0(path_inverse_modeling_engine,'inversion_2018.R'))

cbPalette <- c("#332288","#cc6677","#117733","#88ccee","#882255",
               "#44aa99","#999933","#aa4499")

cbPalette <- c("#cc6677","#332288","#ddcc77","#117733","#88ccee","#882255",
               "#44aa99","#999933","#aa4499")

ems <- c("ha","hb","kf","ma","pi")

p <- ggplot(out_df_total, aes(x=recep,color=em)) +
  geom_point(aes(y=ob*1000)) +
  geom_line(aes(y=y_hat*1000)) +
  geom_line( data = m$bk_df, aes(x=t, y=b_hat*1000),color="black",linetype="dashed") +
  scale_colour_manual(values=cbPalette, breaks=ems ) +
  theme_gray() +
  ylim(1832.5,1862.5) +
  theme(text = element_text(size=15) ) +
  labs(x="Time [UTC]",y=TeX("XCH$_4$  \\[ppb\\]") ,color="Sensor ID") +
  scale_x_datetime(breaks = date_breaks("4 hour"), labels = date_format("%H:%M"))

p

ggsave(
  "fig11_result.png",
  plot = p,
  scale = 1,
  width = 7,
  height = 5,
  units = c("in"),
  dpi = 300
)

ggplot(bars_df_total, aes(x=type,y=emission,fill=prior)) +
  theme_gray() + 
  geom_bar(position=position_dodge(), stat="identity",width=.8) +
  geom_errorbar(position=position_dodge(.8), aes(ymin=lower, ymax=upper),width=.2) +
  theme(text = element_text(size=15) ) +
  labs(x="Date",y=bquote("Total Diffuse"~CH[4]~"Emissions ["~mol~s^{-1}~"]"),title="",fill="Prior Distribution") +
  #geom_hline(yintercept = prior_totals[1]/8.0, linetype=2, color= cbPalette[1] , size=1.25) +
  #geom_hline(yintercept = prior_totals[1]/4.0, linetype=2, color= cbPalette[2], size = 1.25) +
  geom_hline(yintercept = prior_totals[1],     linetype=2, color= "black", size = 1.25) +
#scale_y_continuous(breaks=seq(-40,90,20))
  geom_hline(yintercept = 0) +
  scale_fill_manual(values=cbPalette )

ggsave( paste0( fig_directory,'/fig19_prior_distro_sensitivity.png') )


landfill_error <- sqrt( 14^2 + 5.28^2)
total_error <- sqrt( (prior_totals[1]*sigma_obs_pos[1])^2 + landfill_error^2 )*1.645

#mask marion county:
require(tigris)
marion_county       <- county_subdivisions("IN",county = "Marion")
marion_ras_stack    <- mask( ras_stack, marion_county)
marion_prior_totals <- cellStats(marion_ras_stack*raster::area(marion_ras_stack),"sum") #*to_ggyr    # Add up all the emissions and convert to GG/yr
sectors2 <- c("1B","6A","CG","6B")
names(marion_prior_totals) <- sectors2        

#mask city
woot <- metro_divisions("Indianapolis")
sector_dict1 <- c("X1"="1B","X2"="6A","X3"="CG","X4"="6B")



#get EPA:
foo <- load_epa(foot_file = foot_file, use_ipcc_sectors = T)
epa_ras_stack <- foo$ras_stack
epa_totals <- cellStats(epa_ras_stack*raster::area(epa_ras_stack),"sum")
epa_sectors <- foo$sectors
epa_marion_ras_stack <- mask( epa_ras_stack, marion_county)
epa_marion_totals <- cellStats(epa_marion_ras_stack*raster::area(epa_marion_ras_stack),"sum")
epa_marion_totals <- as.numeric(epa_marion_totals)

#get EDGAR:
foo <- load_edgar(foot_file = foot_file)
edgar_ras_stack <- foo$ras_stack
edgar_totals <- cellStats(edgar_ras_stack*raster::area(edgar_ras_stack),"sum")
edgar_sectors <- foo$sectors
edgar_marion_ras_stack <- mask( edgar_ras_stack, marion_county)
edgar_marion_totals <- cellStats(edgar_marion_ras_stack*raster::area(edgar_marion_ras_stack),"sum")
#county_shapes  <- readOGR('county_shape_files/tl_2018_us_county.shp')

#foo <- county_shapes[[ county_shapes$Name == "Marion"  ]]

#compare with other stuff:
inventory_df <- data.frame()

inventory_df <- rbind(inventory_df , data.frame( source  = "EDGAR" ,
                                                 domain = "Extended Domain",
                                                 sector = edgar_sectors , 
                                                 value =  edgar_totals, 
                                                 lower = NA,
                                                 upper = NA) )

inventory_df <- rbind(inventory_df , data.frame( source  = "EDGAR" ,
                                                 domain = "Marion County",
                                                 sector = edgar_sectors , 
                                                 value =  edgar_marion_totals, 
                                                 lower = NA,
                                                 upper = NA) )

inventory_df <- rbind(inventory_df , data.frame( source  = "EPA" ,
                                                 domain = "Extended Domain",
                                                 sector = epa_sectors , 
                                                 value =  epa_totals, 
                                                 lower = NA,
                                                 upper = NA) )

inventory_df <- rbind(inventory_df , data.frame( source  = "EPA" ,
                                                 domain = "Marion County",
                                                 sector = epa_sectors , 
                                                 value =  epa_marion_totals, 
                                                 lower = NA,
                                                 upper = NA) )

inventory_df <- rbind(inventory_df , data.frame( source = "Prior",
                                                 domain = "Marion County",
                                                 sector = sectors2,
                                                 value = marion_prior_totals,
                                                 lower = NA,
                                                 upper = NA) )

inventory_df <- rbind(inventory_df , data.frame( source = "Prior",
                                                 domain = "Extended Domain",
                                                 sector = sectors2,
                                                 value = prior_totals,
                                                 lower = NA,
                                                 upper = NA) )

inventory_df <- rbind(inventory_df , data.frame( source = "Tower",
                                                 domain = "Extended Domain",
                                                 sector= NA,
                                                 value = 81/to_ggyr, 
                                                 lower = (81-11)/to_ggyr,
                                                 upper = (81+11)/to_ggyr))

inventory_df <- rbind(inventory_df , data.frame( source = "Tower",
                                                 domain = "Marion County",
                                                 sector= NA,
                                                 value = 107,
                                                 lower = NA,
                                                 upper = NA))

inventory_df <- rbind(inventory_df , data.frame( source  = "This Study" ,
                                                 domain = "Extended Domain",
                                                 sector = sectors2 , 
                                                 value = prior_totals*m$x_hat[1:length(sectors)], 
                                                 lower = sum(prior_totals*m$x_hat[1:length(sectors)]) - total_error,
                                                 upper = sum(prior_totals*m$x_hat[1:length(sectors)]) + total_error))


inventory_df <- rbind(inventory_df , data.frame( source  = "This Study" ,
                                                 domain = "Marion County",
                                                 sector = sectors2 , 
                                                 value = marion_prior_totals*m$x_hat[1:length(sectors)], 
                                                 lower = NA,
                                                 upper = NA) )

inventory_df <- rbind(inventory_df , data.frame( source = "Aircraft, 2015",
                                                 domain = "Other",
                                                 sector= NA,
                                                 value = 135, 
                                                 lower = 135 - (1.645*58),
                                                 upper = 135 + (1.645*58) ) )

inventory_df <- rbind(inventory_df , data.frame( source = "Aircraft, 2016",
                                                 domain = "Other",
                                                 sector= NA,
                                                 value = 41/to_ggyr, 
                                                 lower = (41-12)/to_ggyr,
                                                 upper = (41+12)/to_ggyr))

foo <- inventory_df
sector_dict <- c("1A"="1A: Fuel Combustion",
                 "1B"="1B: Fugitive Emissions (diffuse)",
                 "4A"="4A: Enteric Fermentation",
                 "4B"="4B: Manure Management",
                 "6A"="6A: Landfills",
                 "6B"="6B: Wastewater Treatment",
                 "CG"="CG: Pipeline City Gate")
foo$sector <- sector_dict[ foo$sector ]


ggplot( foo , aes(x=source,y=value,fill=sector)) +
  theme_gray() + 
  geom_col(color="black") +
  geom_errorbar(aes(ymin=lower,ymax=upper), width=.2) +
  facet_wrap(~domain,ncol = 2, scales = "free_x") +
  labs(x="",
       y=bquote("Total C"~H[4]~"Emission Rate ["~mol~s^{-1}~"]"),
       fill="Sector") +
  theme(text = element_text(size=15) ) +
  theme(legend.position = c(0.75, 0.25)) 

ggsave( paste0( fig_directory,'/fig18_results_compare.png') )


require(lubridate)

sdf <- m$df
sdf$ob <- 1e3*(sdf$ob - sdf$y_bk)
sdf$y_hat <- 1e3*(sdf$y_hat - sdf$y_bk)
sdf$recep <- round_date( sdf$recep, "1 hour")
#foo$recep <- as.POSIXct( round(foo$recep,"hours") )
sdf      <- aggregate(cbind(ob,y_hat,y_bk) ~ em+recep+date , sdf, "mean")
names(sdf) <- c("recep","em","date","ob","y_hat","y_bk")

#date:
#foo <- m$df
#foo$ob <- 1e3*(foo$ob - foo$y_bk)
#foo$y_hat <- 1e3*(foo$y_hat - foo$y_bk)
#woot2      <- aggregate(cbind(ob,y_hat,y_bk) ~ date , foo, "mean")
#woot3   <- aggregate(cbind(ob,y_hat,y_bk) ~ date , foo, "sd")
#names(woot2) <- c("date","ob","y_hat","y_bk")
#names(woot3) <- c("date","ob","y_hat","y_bk")

#model bins:
#foo <- woot
#woot$ob <- 1e3*(woot$ob - foo$y_bk)
#foo$y_hat <- 1e3*(foo$y_hat - foo$y_bk)
sdf2 <- sdf

bin_width = .5
sdf$y_hat <- ( floor( sdf$y_hat/bin_width)*bin_width ) + bin_width/2
sdf <- sdf[ sdf$y_hat <= 2, ]

woot2      <- aggregate(ob ~ y_hat , sdf, "mean")
woot3   <- aggregate(ob ~ y_hat , sdf, "sd")

names(woot2) <- c("y_hat","ob")
names(woot3) <- c("y_hat","ob")

woot2$sd <- woot3$ob
woot2$min <- woot2$ob - woot3$ob
woot2$max <- woot2$ob + woot3$ob
woot2$n <- 0
for( bin in unique(sdf$y_hat) ){
  woot2$n[ woot2$y_hat == bin ] <- length( sdf$ob[ sdf$y_hat == bin ])
}

woot2$s_n <- woot2$sd/sqrt(woot2$n)
woot2$min <- woot2$ob - woot2$s_n
woot2$max <- woot2$ob + woot2$s_n


ggplot( sdf2, aes(x=ob,y=y_hat)) + 
  geom_point(aes(color=date), alpha=.8,stroke=0,size=2) +
  geom_point(data=woot2,shape=15,size=3,color="black",alpha=.6) +
  geom_errorbarh(data=woot2,aes(xmin=min,xmax=max),alpha=.5,height=.5) +
  theme_gray() +
  scale_color_manual(values=cbPalette) +
  ylim(0,3) +
  scale_x_continuous(breaks=seq(-3,3,1),limits = c(-3,3)) +
  geom_abline(slope=1,intercept=0) +
  coord_equal() +
  theme( panel.background =element_rect(fill="lightgray")) +
  labs(x="Observed Enhancement [ppb]", y="Model Enhancement [ppb]", color="Date")
  

s1 = woot2$s_n[1]
s2 = woot2$s_n[3]
n1 = woot2$n[1]
n2 = woot2$n[3]
x1 = woot2$ob[1]
x2 = woot2$ob[3]

t_stat <- (x2 - x1) / sqrt( s1^2 + s2^2 )

dof <- (s1^2 + s2^2) / ( ( (s1^2)/(n1-1) ) + ((s2^2)/(n2-1)) )

pt(t_stat, dof, lower.tail = FALSE) + pt(-1*t_stat, dof)
